true
setwd("/Volumes/home-1/greally-lab/Claudia_Andrew/Lu_et_al/DESeq2/")

# load in libraries 
library(DESeq2)
library(EDASeq)
library(matrixStats)
library(RUVSeq)
library(qvalue)
library(genefilter)
library(RColorBrewer)
library(pheatmap)
library(UpSetR)
library(RFmarkerDetector)
library(ggplot2)
library(ggthemes)
library(VennDiagram)
library(GeneOverlap)

# set options
options(scipen=999, stringsAsFactors = FALSE)

For the revision of “A cellular stress response induced by the CRISPR/dCas9 activation system is not heritable through cell divisions”, Lu et al 2019 data was analyzed to examine if a similar stress response was observed in their experiments. Specifically, they transfected Cas9 to knockout STAT3 in SKOV3 cells (ovarian cancer cell line). More information at https://www.ncbi.nlm.nih.gov/geo/query/acc.cgi?acc=GSE134375

Not only did they compare the wild type SKOV3 cell line with KO-STAT3 via CRISPR cells BUT also examined transfection with Cas9 w/o gRNA vs. the CRIPSR KO cell line in a separate experiment. While I will look at the data quality from both experimental designs, I’m going to be assessing the difference between WT cells and the Cas9 control, which of note are from different experimental timings.

First we read in the data from count data.

# reading in and merging the counts

## selecting the files ot read in
dir_file <- "../Mapped_STAR_79/"
files <- grep(pattern = "ReadsPerGene.out.tab", x = list.files(path = dir_file), value = TRUE)

files
##  [1] "SKOV3_Cas9_rep1ReadsPerGene.out.tab"   
##  [2] "SKOV3_Cas9_rep2ReadsPerGene.out.tab"   
##  [3] "SKOV3_Cas9_rep3ReadsPerGene.out.tab"   
##  [4] "SKOV3_STAT3_rep1ReadsPerGene.out.tab"  
##  [5] "SKOV3_STAT3_rep2ReadsPerGene.out.tab"  
##  [6] "SKOV3_STAT3_rep3ReadsPerGene.out.tab"  
##  [7] "SKOV3_STAT3KO_rep1ReadsPerGene.out.tab"
##  [8] "SKOV3_STAT3KO_rep2ReadsPerGene.out.tab"
##  [9] "SKOV3_STAT3KO_rep3ReadsPerGene.out.tab"
## [10] "SKOV3_WT_rep1ReadsPerGene.out.tab"     
## [11] "SKOV3_WT_rep2ReadsPerGene.out.tab"     
## [12] "SKOV3_WT_rep3ReadsPerGene.out.tab"
list_counts <- list()
for (i in 1:length(files)){
  list_counts[[i]] <- read.table(paste(dir_file,files[i], sep =""))
  if (i < 2) {
    df_counts <- list_counts[[1]][,c(1,4)]
  }
  else {
    df_counts <- merge(df_counts, list_counts[[i]][,c(1,4)], by = "V1")
  }
}
dim(df_counts) #  60700    13
## [1] 60700    13
tail(df_counts)
##                   V1    V4.x    V4.y    V4.x    V4.y    V4.x    V4.y    V4.x
## 60695     ERCC-00170       0       0       0       0       0       0       0
## 60696     ERCC-00171       0       0       0       0       0       0       0
## 60697    N_ambiguous 1162412 1203710 1327152  713976  849686  684912 1234605
## 60698 N_multimapping 2331780 2438977 3175167 6147027 1845998 1429559 2341813
## 60699    N_noFeature  800405  830146  976053 1518215 1499341 1110702 1228134
## 60700     N_unmapped 3639234 4570481 4429723 5350739 4937477 3762082 4387943
##          V4.y    V4.x    V4.y    V4.x    V4.y
## 60695       0       0       0       0       0
## 60696       0       0       0       0       0
## 60697 1232984  975537  978454  781594  793967
## 60698 2353200 7716074 2333720 1880173 2002903
## 60699 1194135  998451 1386106 1066803 1163040
## 60700 3530238 3528294 3545296 4937216 3763404
## remove the ambiguous, multimapp, no feature, and unmapped read totals
df_counts <- df_counts[-c(60697:60700),]
rownames(df_counts) <- df_counts[,1]
df_counts <- df_counts[,-1]

colnames(df_counts) <- c("Cas9_1", "Cas9_2", "Cas9_3", "STAT3_1", "STAT3_2",
                         "STAT3_3", "KOSTAT3_1", "KOSTAT3_2","KOSTAT3_3",
                         "WT_1", "WT_2", "WT3")
head(df_counts)
##                 Cas9_1 Cas9_2 Cas9_3 STAT3_1 STAT3_2 STAT3_3 KOSTAT3_1
## ENSG00000000003    429    336    697    1038    1240     914      1526
## ENSG00000000005      0      0      0       0       0       0         0
## ENSG00000000419    791    731    881    1240    1543    1145      1065
## ENSG00000000457    180    165    196     130     182     115       140
## ENSG00000000460    399    417    733     443     517     500       411
## ENSG00000000938      0      0      0       0       0       1         3
##                 KOSTAT3_2 KOSTAT3_3 WT_1 WT_2  WT3
## ENSG00000000003      1565      1185  747  682  758
## ENSG00000000005         0         0    0    0    0
## ENSG00000000419      1092       751 1707 1313 1498
## ENSG00000000457       126       104  330  222  246
## ENSG00000000460       440       327 1016  798  874
## ENSG00000000938         4         5    0    0    0

Next we filter the RNAs to be analzyed. First, we apply a simple filter for only those RNAs that are expressed at high levels. The RNA must have at least 5 counts in four of the samples, thus allowing only genes expressed by only one treatment group to be retained. Next, we filter for protein coding genes only or protein coding and long non-coding RNAs.

# expression filter
idx_filt_exp_com <- apply(df_counts, 1, function(x) length(x[x>5])>=4) 
head(idx_filt_exp_com)
## ENSG00000000003 ENSG00000000005 ENSG00000000419 ENSG00000000457 ENSG00000000460 
##            TRUE           FALSE            TRUE            TRUE            TRUE 
## ENSG00000000938 
##           FALSE
filtered_com <- df_counts[idx_filt_exp_com,]
dim(filtered_com) # 17,687     12
## [1] 17687    12
# filter for only protein coding RNAs
prot_ensg_ID <- read.table("../../../indexes/Hg38_rel79_ERCC/prot_ENSG.txt")
dim(prot_ensg_ID) # 22002     1
## [1] 22002     1
filterd_com_pc <- filtered_com[
  rownames(filtered_com) %in% prot_ensg_ID$V1,]
dim(filterd_com_pc) # 13721    6
## [1] 13721    12

Let’s look at the PCA and RLE plots.

# resorting the columns so that experiment samples are next to each other
filterd_com_pc <- filterd_com_pc[,c(10:12,4:6,1:3,7:9)]
head(filterd_com_pc)
##                 WT_1 WT_2  WT3 STAT3_1 STAT3_2 STAT3_3 Cas9_1 Cas9_2 Cas9_3
## ENSG00000000003  747  682  758    1038    1240     914    429    336    697
## ENSG00000000419 1707 1313 1498    1240    1543    1145    791    731    881
## ENSG00000000457  330  222  246     130     182     115    180    165    196
## ENSG00000000460 1016  798  874     443     517     500    399    417    733
## ENSG00000000971   29   33   37      21      41      19     17     11     18
## ENSG00000001036 3079 2266 2301    1906    2260    1851   2475   2557   3624
##                 KOSTAT3_1 KOSTAT3_2 KOSTAT3_3
## ENSG00000000003      1526      1565      1185
## ENSG00000000419      1065      1092       751
## ENSG00000000457       140       126       104
## ENSG00000000460       411       440       327
## ENSG00000000971       165       171       123
## ENSG00000001036      3272      3434      2498
# set a factor for different treatments
treatments_com <- as.factor(c(rep(c("Ctrl","STAT3"), each=3),rep(c("CRISPR","STAT3"), each=3))) 
treatments_com <- relevel(treatments_com, c("Ctrl"))
experiment_com <- as.factor(rep(c("Exp1", "Exp2"),each=6))

# create expression sets 
eset_pc_com <- newSeqExpressionSet(as.matrix(filterd_com_pc),
                                  phenoData = data.frame(treatments_com, 
                                                   row.names=colnames(filterd_com_pc)))

# choose a color set
colors_com <- brewer.pal(6, "Dark2")
colors <- brewer.pal(3, "Dark2")


# Make RLE plots
plotRLE(eset_pc_com, outline=FALSE, ylim=c(-4, 4), col=colors[treatments_com],
        main="Protein coding RNAs before normalization") 

limma::plotMDS(counts(eset_pc_com), dim=c(2,3))

# Make PCA plots
plotPCA(eset_pc_com, col=colors[treatments_com], cex=1.2, 
        main = "Protein coding RNAs before normalization")

plotPCA(eset_pc_com, k=3, col=colors[treatments_com], cex=1.2, 
        main="Protein coding RNAs before normalization") 

plotPCA(eset_pc_com, k=3, col=colors[experiment_com], cex=1.2, 
        main="Protein coding RNAs before normalization") 

The STAT3 KO treatment groups cluster together as expected in PC1. And the two experiments are defined by PC2.

Next we normalize based on housekeeping gene expression. House keeping genes were identified by a previous study “Human housekeeping genes revisited” E. Eisenberg and E.Y. Levanon, Trends in Genetics, 29 (2013) and a list is avaialble for download at (https://www.tau.ac.il/~elieis/HKG/). We took only the bottom quartile variance house keeping genes to use for normalization.

# read in house keeping genes 
HK_genes <- read.table("../../CRISPR_Proj_combined/HK_ensembl_ID.txt")
dim(HK_genes) # 4202    1
## [1] 4202    1
# grab the HK genes from RNAs being analyzed 
HK_pc_com <- filterd_com_pc[which(rownames(filterd_com_pc) %in% HK_genes[,1]),]
dim(HK_pc_com) # 3755    12
## [1] 3755   12
# examine the variance of the HK genes and take only the bottom 1000 genes to normalize with
## for protein coding RNAs only
HK_pc_com_rsd <- apply(as.matrix(HK_pc_com), 1, rsd)
boxplot(HK_pc_com_rsd)

summary(HK_pc_com_rsd)
##    Min. 1st Qu.  Median    Mean 3rd Qu.    Max. 
##    9.24   29.12   38.76   40.48   48.58  127.20
HK_pc_lowRSD <- sort(HK_pc_com_rsd)[1:1000]

# Normalize using the house keeping genes
eset_pc_norm_com <- RUVg(eset_pc_com, names(HK_pc_lowRSD), k=1) 

# The weights have been added to the phenotype data
pData(eset_pc_norm_com)
##           treatments_com         W_1
## WT_1                Ctrl -0.12650387
## WT_2                Ctrl -0.20117620
## WT3                 Ctrl -0.24353492
## STAT3_1            STAT3 -0.40425695
## STAT3_2            STAT3 -0.26679757
## STAT3_3            STAT3 -0.37549803
## Cas9_1            CRISPR  0.28056224
## Cas9_2            CRISPR  0.30645156
## Cas9_3            CRISPR  0.45333842
## KOSTAT3_1          STAT3  0.25041828
## KOSTAT3_2          STAT3  0.24804424
## KOSTAT3_3          STAT3  0.07895279
# Make RLE plots
plotRLE(eset_pc_norm_com, outline=FALSE, ylim=c(-4, 4), col=colors[treatments_com],
        main="Protein coding RNAs after normalization") 

# Make PCA plots
plotPCA(eset_pc_norm_com, col=colors[treatments_com], cex=1.2, 
        main = "Protein coding RNAs after normalization")

plotPCA(eset_pc_norm_com, k=3, col=colors[treatments_com], cex=1.2, 
        main="Protein coding RNAs after normalization") 

The normalized data looks more askew than before normalization. Therefore, we will perform DE gene analyis on the non-RUVg normalized data set eset_pc_com.

Next, we perform the differential expression among the different treatments

#adding the replicates information to the pData
pData(eset_pc_com) <- cbind(pData(eset_pc_com), experiment_com)

# convert the expression set to a DESeq object

# WT vs. STAT3 control
treatments_ctrl <- as.factor(rep(c("Ctrl","STAT3"), each=3)) 
pdata_ws <- pData(eset_pc_com)[1:6,]
pdata_ws$treatments_ctrl <- as.factor(as.character(pdata_ws$treatments_com))
dds_wt_Stat3 <- DESeqDataSetFromMatrix(countData = counts(eset_pc_com)[,1:6], 
                                    colData = pdata_ws, 
                                    design = ~treatments_ctrl)
# Cas9 vs. STATKO
treatments_crisp <- as.factor(rep(c("CRISPR","STAT3"), each=3)) 
treatments_crisp <- relevel(treatments_crisp, c("CRISPR"))
pdata_cs <- pData(eset_pc_com)[7:12,]
pdata_cs$treatments_crisp <- as.factor(as.character(pdata_cs$treatments_com))
dds_CRISPR_Stat3 <- DESeqDataSetFromMatrix(countData = as.matrix(counts(eset_pc_com)[,7:12]), 
                                    colData = pdata_cs, 
                                    design = ~treatments_crisp)

# WT vs. Cas9 Control
treatments_wt <- as.factor(rep(c("Ctrl","CRISPR"), each=3)) 
treatments_wt <- relevel(treatments_wt, c("Ctrl"))
pdata_wt <- pData(eset_pc_com)[c(1:3,7:9),]
pdata_wt$treatments_wt <- as.factor(as.character(pdata_wt$treatments_com))
pdata_wt$treatments_wt <- relevel(pdata_wt$treatments_wt, "Ctrl")

dds_wt_CRISPR <- DESeqDataSetFromMatrix(countData = counts(eset_pc_com)[,c(1:3,7:9)], 
                                    colData = pdata_wt, 
                                    design = ~treatments_wt)

# Run DESeq Wald tests
dds_wt_Stat3 <- DESeq(dds_wt_Stat3)
dds_CRISPR_Stat3 <- DESeq(dds_CRISPR_Stat3)
dds_wt_CRISPR <- DESeq(dds_wt_CRISPR)


# generate results among the different treatments_com and set a log fold change threshold of 1
res_wt_Stat3 <- results(dds_wt_Stat3, lfcThreshold=1, altHypothesis="greaterAbs", 
                       contrast = c("treatments_ctrl", "STAT3", "Ctrl"), alpha=0.05)

res_CRISPR_Stat3 <- results(dds_CRISPR_Stat3, lfcThreshold=1, altHypothesis="greaterAbs", 
                       contrast = c("treatments_crisp", "STAT3", "CRISPR"), alpha=0.05)

res_wt_CRISPR <- results(dds_wt_CRISPR, lfcThreshold=1, altHypothesis="greaterAbs", 
                       contrast = c("treatments_wt", "CRISPR", "Ctrl"), alpha=0.05)

# draw MA plots of results 
## draw horizontal lines for log fold change threshold
drawLines <- function() abline(h=c(-1,1),col="dodgerblue",lwd=2)
ylim<-c(-8,8)

##draw the MA plots
DESeq2::plotMA(res_wt_Stat3, 
               main="WT vs STAT3"); drawLines()

DESeq2::plotMA(res_CRISPR_Stat3, 
               main="CRISPR vs STAT3-KO"); drawLines()

DESeq2::plotMA(res_wt_CRISPR,
               main="WT vs. CRISPR", ylim=ylim); drawLines()

# graphs show that STAT3 KO affects many genes 

# looking at the summaries
summary(res_wt_Stat3) # 1921 up and 1496 down
## 
## out of 13721 with nonzero total read count
## adjusted p-value < 0.05
## LFC > 1.00 (up)    : 1921, 14%
## LFC < -1.00 (down) : 1496, 11%
## outliers [1]       : 2, 0.015%
## low counts [2]     : 0, 0%
## (mean count < 1)
## [1] see 'cooksCutoff' argument of ?results
## [2] see 'independentFiltering' argument of ?results
summary(res_CRISPR_Stat3) # 2435 up and 1765 down
## 
## out of 13721 with nonzero total read count
## adjusted p-value < 0.05
## LFC > 1.00 (up)    : 2435, 18%
## LFC < -1.00 (down) : 1765, 13%
## outliers [1]       : 3, 0.022%
## low counts [2]     : 0, 0%
## (mean count < 1)
## [1] see 'cooksCutoff' argument of ?results
## [2] see 'independentFiltering' argument of ?results
summary(res_wt_CRISPR) # 415 up and 1294
## 
## out of 13655 with nonzero total read count
## adjusted p-value < 0.05
## LFC > 1.00 (up)    : 415, 3%
## LFC < -1.00 (down) : 1294, 9.5%
## outliers [1]       : 0, 0%
## low counts [2]     : 265, 1.9%
## (mean count < 1)
## [1] see 'cooksCutoff' argument of ?results
## [2] see 'independentFiltering' argument of ?results
# grabbing the ENSG IDs from the differentially expressed genes 
res_wt_Stat3_nona <- res_wt_Stat3[!is.na(res_wt_Stat3$padj),]
res_wt_Stat3_nona_IDs <- rownames(res_wt_Stat3_nona)[res_wt_Stat3_nona$padj<0.05]
length(res_wt_Stat3_nona_IDs) #3417
## [1] 3417
num_WTvSTAT3 <- length(res_wt_Stat3_nona_IDs) 

res_CRISPR_Stat3_nona <- res_CRISPR_Stat3[!is.na(res_CRISPR_Stat3$padj),]
res_CRISPR_Stat3_nona_IDs <- rownames(res_CRISPR_Stat3_nona)[res_CRISPR_Stat3_nona$padj<0.05]
length(res_CRISPR_Stat3_nona_IDs) #4200
## [1] 4200
num_CRISPRvSTAT3 <- length(res_CRISPR_Stat3_nona_IDs) 

res_wt_CRISPR_nona <- res_wt_CRISPR[!is.na(res_wt_CRISPR$padj),]
res_wt_CRISPR_nona_IDs <- rownames(res_wt_CRISPR_nona)[res_wt_CRISPR_nona$padj<0.05]
length(res_wt_CRISPR_nona_IDs) #1709
## [1] 1709
num_WTvCRISPR <- length(res_wt_CRISPR_nona_IDs) 

# what's the overlap amongst the three comparisons?
WTvCRISPR_over_WTvSTAT3_IDs <- res_wt_CRISPR_nona_IDs[(res_wt_CRISPR_nona_IDs %in% res_wt_Stat3_nona_IDs)]
WTvCRISPR_over_CRISPRvSTAT3_IDs <- res_wt_CRISPR_nona_IDs[(res_wt_CRISPR_nona_IDs %in% res_CRISPR_Stat3_nona_IDs)]

sum(WTvCRISPR_over_WTvSTAT3_IDs %in% WTvCRISPR_over_CRISPRvSTAT3_IDs)
## [1] 306

Let’s make an upset plot of the overlaps between the three DEG comparisons.

sum(res_wt_Stat3_nona_IDs %in% res_CRISPR_Stat3_nona_IDs) # 2948 are the same 
## [1] 2948
sum(res_wt_CRISPR_nona_IDs %in% res_wt_Stat3_nona_IDs) # 399 are the same 
## [1] 399
sum(res_wt_CRISPR_nona_IDs %in% res_CRISPR_Stat3_nona_IDs) # 434 are the same 
## [1] 434
# Setting up the dataframe for the upset plot. 
upset_wt_STAT3 <- data.frame(Gene=res_wt_Stat3_nona_IDs,
                                                  Wt_STAT3=1)
upset_wt_CRISPR <- data.frame(Gene=res_wt_CRISPR_nona_IDs,
                                                  WT_CRIPSR=1)
upset_CRISPR_STAT3 <- data.frame(Gene=res_CRISPR_Stat3_nona_IDs,
                                                  CRIPSR_STAT3=1)
upsetdf_merge_1 <- merge(upset_wt_STAT3, 
                            upset_wt_CRISPR,
                            by="Gene", all=T)
upsetdf_merge_2 <- merge(upsetdf_merge_1, upset_CRISPR_STAT3,
                            by="Gene", all=T)
dim(upsetdf_merge_2) # 189   3
## [1] 5851    4
upsetdf_final<- replace(upsetdf_merge_2,is.na(upsetdf_merge_2),0)

upset(upsetdf_final, nsets = 3, number.angles = 0, point.size = 3.5, 
      line.size = 1.5, mainbar.y.label = "DE Gene Intersections", 
      sets.x.label = "# of DE Genes", text.scale = c(1.3, 1, 1, 1, 1, 1), order.by = "freq")

Comparing our study wtih the WT vs CRISPR

# read in our 97 gene list
res_us_com <- read.table("../../CRISPR_Proj_combined/DEG_CRISPR_CD34_combined.txt", header=T)
dim(res_us_com)
## [1] 97  2
sum(res_wt_CRISPR_nona_IDs %in% res_us_com$ENSEMBL) # 13
## [1] 13
sum(res_wt_Stat3_nona_IDs %in% res_us_com$ENSEMBL) # 34 
## [1] 34
sum(res_CRISPR_Stat3_nona_IDs %in% res_us_com$ENSEMBL) # 43
## [1] 43
# GOI = genes of interest
#ENSG00000171223      JUNB
#ENSG00000104856      RELB
# ENSG00000175197     DDIT3

GOI <- res_us_com[which(res_us_com$SYMBOL %in% c("JUNB", "RELB", "DDIT3")),]
res_wt_CRISPR_nona_IDs[(res_wt_CRISPR_nona_IDs %in% GOI$ENSEMBL)] # 2
## [1] "ENSG00000104856" "ENSG00000171223"
res_wt_Stat3_nona_IDs[(res_wt_Stat3_nona_IDs %in% GOI$ENSEMBL)] # 2
## [1] "ENSG00000171223" "ENSG00000175197"
res_CRISPR_Stat3_nona_IDs[(res_CRISPR_Stat3_nona_IDs %in% GOI$ENSEMBL)] # 2
## [1] "ENSG00000171223" "ENSG00000175197"
# plot the GOI genes
class(GOI$ENSEMBL[1])               
## [1] "factor"
#plotCounts(dds_wt_Stat3, gene=GOI$ENSEMBL[1], intgroup=c("treatments_ctrl"))
#plotCounts(dds_wt_Stat3, gene=GOI$ENSEMBL[2], intgroup=c("treatments_ctrl"))
#plotCounts(dds_wt_Stat3, gene=GOI$ENSEMBL[3], intgroup=c("treatments_ctrl"))

#plotCounts(dds_CRISPR_Stat3, gene=GOI$ENSEMBL[1], intgroup=c("treatments_crisp"))
plotCounts(dds_CRISPR_Stat3, gene="ENSG00000171223", intgroup=c("treatments_crisp"))

plotCounts(dds_CRISPR_Stat3, gene="ENSG00000175197", intgroup=c("treatments_crisp"))

plotCounts(dds_wt_CRISPR, gene="ENSG00000104856", intgroup=c("treatments_wt"), 
           main="RELB - WT v Cas9-ctrl")

plotCounts(dds_wt_CRISPR, gene="ENSG00000171223", intgroup=c("treatments_wt"),
           main="JUNB - WT v Cas9-ctrl")

plotCounts(dds_wt_CRISPR, gene="ENSG00000175197", intgroup=c("treatments_wt"),
           main="DDIT3 - WT v Cas9-ctrl") #DDIT3 not sig :-/ 

# Make pdfs of ggplot versions:
# pdf("VennD_David_v_Lu.pdf", width=6, height = 4, family="ArialMT")
data_RELB <- plotCounts(dds_wt_CRISPR, gene="ENSG00000104856", intgroup=c("treatments_wt"), returnData=TRUE)
data_JUN <- plotCounts(dds_wt_CRISPR, gene="ENSG00000171223", intgroup=c("treatments_wt"), returnData=TRUE)
data_DDIT3 <- plotCounts(dds_wt_CRISPR, gene="ENSG00000175197", intgroup=c("treatments_wt"), returnData=TRUE)

library(scales)

col_blind<- colorblind_pal()(8)
RelB_ggcount <- ggplot(data_RELB, aes(x=treatments_wt, y=count, fill=treatments_wt)) +
  scale_y_log10(limits=c(70,1300)) + 
  scale_fill_manual(values=c(col_blind[2], col_blind[4]))+
  geom_dotplot(binaxis="y", stackdir="center") +
  xlab("Condition")+
  ylab("Raw Counts (log10 scale)")+
  theme_hc()
ggsave(plot = RelB_ggcount, filename = "RelB_ggcount.pdf", width=2.5, height=4, useDingbats=FALSE)

JUNB_ggcount <- ggplot(data_JUN, aes(x=treatments_wt, y=count, fill=treatments_wt)) +
  scale_y_log10(limits=c(70,1300)) +
  scale_fill_manual(values=c(col_blind[2], col_blind[4]))+
  geom_dotplot(binaxis="y", stackdir="center") +
  xlab("Condition")+
  ylab("Raw Counts (log10 scale)")+
  theme_hc()
ggsave(plot = JUNB_ggcount, filename = "JUNB_ggcount.pdf", width=2.5, height=4, useDingbats=FALSE)

DDIT3_ggcount <- ggplot(data_DDIT3, aes(x=treatments_wt, y=count, fill=treatments_wt)) +
  scale_y_log10(limits=c(70,1300)) +
  scale_fill_manual(values=c(col_blind[2], col_blind[4]))+
  geom_dotplot(binaxis="y", stackdir="center") +
  xlab("Condition")+
  ylab("Raw Counts (log10 scale)")+
  theme_hc()
ggsave(plot = DDIT3_ggcount, filename = "DDIT3_ggcount.pdf", width=2.5, height=4, useDingbats=FALSE)

#Get Pvalues for GOI
res_wt_CRISPR_nona[which(rownames(res_wt_CRISPR_nona)%in% GOI$ENSEMBL),]
## log2 fold change (MLE): treatments_wt CRISPR vs Ctrl 
## Wald test p-value: treatments wt CRISPR vs Ctrl 
## DataFrame with 3 rows and 6 columns
##                         baseMean     log2FoldChange             lfcSE
##                        <numeric>          <numeric>         <numeric>
## ENSG00000104856 441.091835359951   1.64915358381492 0.125627290175259
## ENSG00000171223 778.186638381378   1.49148129899352 0.136204319869892
## ENSG00000175197 95.2397402858023 -0.259086537822926 0.212389100277217
##                             stat               pvalue                 padj
##                        <numeric>            <numeric>            <numeric>
## ENSG00000104856 5.16729751082987 2.37503050388141e-07 4.61562531886388e-06
## ENSG00000171223 3.60841197594173 0.000308077005136469  0.00333480282843761
## ENSG00000175197                0                    1                    1
#                                 pvalue                   padj
#                              <numeric>              <numeric>
# ENSG00000104856 0.000000237503050388141 0.00000461562531886388
# ENSG00000171223    0.000308077005136469    0.00333480282843761
# ENSG00000175197                       1                      1

#Get DEGs info for intersecting DAVID genes
# load DAVID gene table 
david_genes <- read.table("DAVID_Genes.txt", header = F, sep="\t")

# overlapping?
sum(res_wt_CRISPR_nona_IDs %in% david_genes$V1) # 10
## [1] 10
sum(res_wt_Stat3_nona_IDs %in% david_genes$V1) # 14
## [1] 11
sum(res_CRISPR_Stat3_nona_IDs %in% david_genes$V1) # 11
## [1] 14
# output table
lu_DEG_intersect <- res_wt_CRISPR_nona[which(rownames(res_wt_CRISPR_nona) %in% david_genes$V1),]
dim(lu_DEG_intersect)
## [1] 27  6
lu_DEG_intersect_sig <- lu_DEG_intersect[lu_DEG_intersect$padj<0.05,]
dim(lu_DEG_intersect_sig)
## [1] 10  6
head(lu_DEG_intersect_sig)
## log2 fold change (MLE): treatments_wt CRISPR vs Ctrl 
## Wald test p-value: treatments wt CRISPR vs Ctrl 
## DataFrame with 6 rows and 6 columns
##                         baseMean    log2FoldChange              lfcSE
##                        <numeric>         <numeric>          <numeric>
## ENSG00000049249  66.880666362956   2.3724822177598  0.285969186396828
## ENSG00000077150 1763.62476343779  1.74809207493027 0.0894486151591179
## ENSG00000101255 963.183166956576  1.48633865064483  0.110651712286853
## ENSG00000104856 441.091835359951  1.64915358381492  0.125627290175259
## ENSG00000105499 61.4240987578971  1.98166103101569  0.284299456092283
## ENSG00000119922 85.0790997335938 -1.77939144727199  0.230831286040293
##                             stat               pvalue                 padj
##                        <numeric>            <numeric>            <numeric>
## ENSG00000049249 4.79940596066622 1.59136935479702e-06 2.64372650877569e-05
## ENSG00000077150 8.36337235181904 6.09504665211042e-17 4.36431415356998e-15
## ENSG00000101255 4.39522028709374 1.10660475054066e-05 0.000155809017978333
## ENSG00000104856 5.16729751082987 2.37503050388141e-07 4.61562531886388e-06
## ENSG00000105499 3.45291209666277 0.000554569686705044   0.0056162982969951
## ENSG00000119922 -3.3764549885839 0.000734263851045054  0.00720805935886603
write.table(lu_DEG_intersect_sig, "lu_DAVID_intersect_sig.txt",append = F, row.names = T, col.names=T, quote=F, sep = "\t")

# is p53 expressed in SKOV3 cells?
plotCounts(dds_wt_CRISPR, gene="ENSG00000141510", intgroup=c("treatments_wt"),
           main="DDIT3 - WT v Cas9-ctrl")

res_wt_CRISPR_nona[which(rownames(res_wt_CRISPR_nona) == "ENSG00000141510"),]
## log2 fold change (MLE): treatments_wt CRISPR vs Ctrl 
## Wald test p-value: treatments wt CRISPR vs Ctrl 
## DataFrame with 1 row and 6 columns
##                         baseMean   log2FoldChange              lfcSE
##                        <numeric>        <numeric>          <numeric>
## ENSG00000141510 2245.75256398372 1.37250343531241 0.0862314239962154
##                             stat               pvalue                padj
##                        <numeric>            <numeric>           <numeric>
## ENSG00000141510 4.31981078416099 1.56163040903836e-05 0.00021443522465069

Figuring out how many prot coding genes analyzed in our analysis

# reading in and merging the counts
dir_claudia <- "../../CRISPR_Proj_combined/"
## selecting the files ot read in
files_claudia <- grep(pattern = "ReadsPerGene.out.tab", x = list.files(path=dir_claudia), value = TRUE)

files_claudia
##  [1] "1A.1ReadsPerGene.out.tab"       "1minus.1ReadsPerGene.out.tab"  
##  [3] "2Aplus.1ReadsPerGene.out.tab"   "2plus.1ReadsPerGene.out.tab"   
##  [5] "3AQ2.1ReadsPerGene.out.tab"     "3plus.1ReadsPerGene.out.tab"   
##  [7] "CD34_1.1ReadsPerGene.out.tab"   "CD34_2.1ReadsPerGene.out.tab"  
##  [9] "CRISPR_1.1ReadsPerGene.out.tab" "CRISPR_2.1ReadsPerGene.out.tab"
## [11] "Ctrl_1.1ReadsPerGene.out.tab"   "Ctrl_2.1ReadsPerGene.out.tab"
list_counts_c <- list()
for (i in 1:length(files_claudia)){
  list_counts_c[[i]] <- read.table(paste(dir_claudia, files_claudia[i],sep=""))
  if (i < 2) {
    df_counts_c <- list_counts_c[[1]][,c(1,4)]
  }
  else {
    df_counts_c <- merge(df_counts_c, list_counts_c[[i]][,c(1,4)], by = "V1")
  }
}
dim(df_counts_c) #  60700    13
## [1] 60700    13
## remove the ambiguous, multimapp, no feature, and unmapped read totals
df_counts_c <- df_counts_c[-c(60697:60700),]
rownames(df_counts_c) <- df_counts_c[,1]
df_counts_c <- df_counts_c[,-1]

colnames(df_counts_c) <- c("Ctrl_1_1", "Ctrl_1_2", "CRISPR_1_1", "CRISPR_1_2", "CD34_1_1",
                         "CD34_1_2", "CD34_2_1", "CD34_2_2", "CRISPR_2_1","CRISPR_2_2",
                         "Ctrl_2_1", "Ctrl_2_2")
head(df_counts_c)
##                 Ctrl_1_1 Ctrl_1_2 CRISPR_1_1 CRISPR_1_2 CD34_1_1 CD34_1_2
## ENSG00000000003     1915     2792       1970       1439     1937     2008
## ENSG00000000005        0        3          0          1        0        0
## ENSG00000000419      601      770       1036        693      873      924
## ENSG00000000457      342      389        397        234      302      352
## ENSG00000000460      637      867        786        504      670      663
## ENSG00000000938        1        0          2          0        1        1
##                 CD34_2_1 CD34_2_2 CRISPR_2_1 CRISPR_2_2 Ctrl_2_1 Ctrl_2_2
## ENSG00000000003     2104     1654       1935       2562     2205     2708
## ENSG00000000005        0        0          1          0        0        1
## ENSG00000000419     1037      774        982       1227      696      762
## ENSG00000000457      341      278        311        453      288      396
## ENSG00000000460      796      571        739        896      683      803
## ENSG00000000938        1        0          7          0        0        0

Next we filter the RNAs to be analzyed. First, we apply a simple filter for only those RNAs that are expressed at high levels. The RNA must have at least 5 counts in four of the samples, thus allowing only genes expressed by only one treatment group to be retained. Next, we filter for protein coding genes only or protein coding and long non-coding RNAs.

# expression filter
idx_filt_exp_com_c <- apply(df_counts_c, 1, function(x) length(x[x>5])>=4) 
head(idx_filt_exp_com_c)
## ENSG00000000003 ENSG00000000005 ENSG00000000419 ENSG00000000457 ENSG00000000460 
##            TRUE           FALSE            TRUE            TRUE            TRUE 
## ENSG00000000938 
##           FALSE
filtered_com_c <- df_counts_c[idx_filt_exp_com_c,]
dim(filtered_com_c) # 19,244     12
## [1] 19244    12
# remove spike ins
spikes_com <- grep("ERCC", rownames(filtered_com_c))
length(spikes_com) # 12
## [1] 12
filterd_noSpike_com_c <- filtered_com_c[-spikes_com,]
dim(filterd_noSpike_com_c) # 19232 12
## [1] 19232    12
# filter for only protein coding RNAs
filterd_noSpike_pc_com_c <- filterd_noSpike_com_c[
  rownames(filterd_noSpike_com_c) %in% prot_ensg_ID$V1,]
dim(filterd_noSpike_pc_com_c) # 14260    6
## [1] 14260    12

Now we know the number of prot coding genes that were analyzed by our study. With this information, we can more stringently test for overlaps of HEK293T significant genes overlapping with those of Lu et al’s SKOV3.

Performing the gene overlap analysis

#Assessing true gene set size
merge_protcoding <- merge(x= filterd_noSpike_pc_com_c, y=filterd_com_pc, by="row.names", all=TRUE)
num_genes_inboth <-nrow(merge_protcoding)

# Performing GeneOverlap analysis on DAVID
#go_david <- newGeneOverlap(david_genes$V1, res_wt_CRISPR_nona_IDs,            genome.size=nrow(prot_ensg_ID))
go_david <- newGeneOverlap(david_genes$V1, res_wt_CRISPR_nona_IDs,            genome.size=num_genes_inboth)
go_david <- testGeneOverlap(go_david)
print(go_david)
## Detailed information about this GeneOverlap object:
## listA size=30, e.g. ENSG00000175592 ENSG00000006327 ENSG00000148677
## listB size=1709, e.g. ENSG00000001561 ENSG00000001631 ENSG00000002586
## Intersection size=10, e.g. ENSG00000119922 ENSG00000161011 ENSG00000171223
## Union size=1729, e.g. ENSG00000175592 ENSG00000006327 ENSG00000148677
## Genome size=15014
## # Contingency Table:
##       notA inA
## notB 13285  20
## inB   1699  10
## Overlapping p-value=1.2e-03
## Odds ratio=3.9
## Overlap tested using Fisher's exact test (alternative=greater)
## Jaccard Index=0.0
getIntersection(go_david)
##  [1] "ENSG00000119922" "ENSG00000161011" "ENSG00000171223" "ENSG00000104856"
##  [5] "ENSG00000077150" "ENSG00000128965" "ENSG00000101255" "ENSG00000049249"
##  [9] "ENSG00000105499" "ENSG00000130513"
# Performing GeneOverlap analysis for DEG 97 genes 
#go_DEG <- newGeneOverlap(res_us_com$ENSEMBL, res_wt_CRISPR_nona_IDs,            genome.size=nrow(prot_ensg_ID))
go_DEG <- newGeneOverlap(res_us_com$ENSEMBL, res_wt_CRISPR_nona_IDs,            genome.size=num_genes_inboth)
go_DEG <- testGeneOverlap(go_DEG)
print(go_DEG)
## Detailed information about this GeneOverlap object:
## listA size=97, e.g. ENSG00000006128 ENSG00000006327 ENSG00000008517
## listB size=1709, e.g. ENSG00000001561 ENSG00000001631 ENSG00000002586
## Intersection size=13, e.g. ENSG00000008517 ENSG00000049249 ENSG00000077150
## Union size=1793, e.g. ENSG00000006128 ENSG00000006327 ENSG00000008517
## Genome size=15014
## # Contingency Table:
##       notA inA
## notB 13221  84
## inB   1696  13
## Overlapping p-value=0.31
## Odds ratio=1.2
## Overlap tested using Fisher's exact test (alternative=greater)
## Jaccard Index=0.0
getIntersection(go_DEG)
##  [1] "ENSG00000008517" "ENSG00000049249" "ENSG00000077150" "ENSG00000101255"
##  [5] "ENSG00000104856" "ENSG00000105499" "ENSG00000119922" "ENSG00000128965"
##  [9] "ENSG00000130513" "ENSG00000151012" "ENSG00000161011" "ENSG00000167767"
## [13] "ENSG00000171223"

Making a diagram of the overlap between treatment comparisons

Num_david <- nrow(david_genes)
Num_wt_CRISPR_IDs <- length(res_wt_CRISPR_nona_IDs)
Num_Overlap_wt_CRISPR_david <- sum(res_wt_CRISPR_nona_IDs %in% david_genes$V1)

library(VennDiagram)
grid.newpage()
draw.pairwise.venn(Num_david, Num_wt_CRISPR_IDs, Num_Overlap_wt_CRISPR_david, 
                   category = c("David Identified Genes\nCurrent Study", 
                                "WT vs. dCas9 Control\nLu et al."), 
                   lty = rep("blank", 2), fill = c("#7570B3", "#D95F02"),
                   alpha = rep(0.5, 2), cat.pos = c(0, 0), 
                   cat.dist = rep(0.025, 2))
## (polygon[GRID.polygon.11], polygon[GRID.polygon.12], polygon[GRID.polygon.13], polygon[GRID.polygon.14], text[GRID.text.15], text[GRID.text.16], lines[GRID.lines.17], text[GRID.text.18], lines[GRID.lines.19], text[GRID.text.20], text[GRID.text.21])
pdf("VennD_David_v_Lu.pdf", width=6, height = 4, family="ArialMT")
draw.pairwise.venn(Num_david, Num_wt_CRISPR_IDs, Num_Overlap_wt_CRISPR_david, 
                   category = c("David Identified Genes\nCurrent Study", 
                                "WT vs. dCas9 Control\nLu et al."), 
                   lty = rep("blank", 2), fill = c("#7570B3", "#D95F02"),
                   alpha = rep(0.5, 2), cat.pos = c(0, 0), 
                   cat.dist = rep(0.025, 2))
## (polygon[GRID.polygon.22], polygon[GRID.polygon.23], polygon[GRID.polygon.24], polygon[GRID.polygon.25], text[GRID.text.26], text[GRID.text.27], lines[GRID.lines.28], text[GRID.text.29], lines[GRID.lines.30], text[GRID.text.31], text[GRID.text.32])
dev.off()
## quartz_off_screen 
##                 2
# all 97 DEGs 
num_combined <- nrow(res_us_com)
Num_Overlap_wt_CRISPR_combined <- sum(res_wt_CRISPR_nona_IDs %in% res_us_com$ENSEMBL)

draw.pairwise.venn(num_combined, Num_wt_CRISPR_IDs, Num_Overlap_wt_CRISPR_combined, 
                   category = c("DEG Current Study", 
                                "WT vs. dCas9 Control\nLu et al."), 
                   lty = rep("blank", 2), fill = c("#7570B3", "#D95F02"),
                   alpha = rep(0.5, 2), cat.pos = c(0, 0), 
                   cat.dist = rep(0.025, 2))

## (polygon[GRID.polygon.33], polygon[GRID.polygon.34], polygon[GRID.polygon.35], polygon[GRID.polygon.36], text[GRID.text.37], text[GRID.text.38], lines[GRID.lines.39], text[GRID.text.40], lines[GRID.lines.41], text[GRID.text.42], text[GRID.text.43])
pdf("VennD_DEG_v_Lu.pdf", width=6, height = 4, family="ArialMT")
draw.pairwise.venn(num_combined, Num_wt_CRISPR_IDs, Num_Overlap_wt_CRISPR_combined, 
                   category = c("DEG Current Study", 
                                "WT vs. dCas9 Control\nLu et al."), 
                   lty = rep("blank", 2), fill = c("#7570B3", "#D95F02"),
                   alpha = rep(0.5, 2), cat.pos = c(0, 0), 
                   cat.dist = rep(0.025, 2))
## (polygon[GRID.polygon.44], polygon[GRID.polygon.45], polygon[GRID.polygon.46], polygon[GRID.polygon.47], text[GRID.text.48], text[GRID.text.49], lines[GRID.lines.50], text[GRID.text.51], lines[GRID.lines.52], text[GRID.text.53], text[GRID.text.54])
dev.off()
## quartz_off_screen 
##                 2

Now to MAplots highlighting the three genes of interest

color_genes <- c("#000000", "#FF0000", "#0432FF", "#548235", "#FF9300") 

# color genes
res_wt_CRISPR$color <- 1
res_wt_CRISPR$color[rownames(res_wt_CRISPR)=="ENSG00000174059"] <- 2 # CD34
res_wt_CRISPR$color[rownames(res_wt_CRISPR)=="ENSG00000104856"] <- 3 # Rel B
res_wt_CRISPR$color[rownames(res_wt_CRISPR)=="ENSG00000077150"] <- 1 # NFkb2
res_wt_CRISPR$color[rownames(res_wt_CRISPR)=="ENSG00000175197"] <- 5 # DDIT3
res_wt_CRISPR$color[rownames(res_wt_CRISPR)=="ENSG00000171223"] <- 6 # JunB
res_wt_CRISPR$color[rownames(res_wt_CRISPR)=="ENSG00000175592"] <- 1 # FOSL1
res_wt_CRISPR$color[rownames(res_wt_CRISPR)=="ENSG00000101255"] <- 1 # TRIB3

# make non-DE transparent
res_wt_CRISPR$trans <- 0.1
res_wt_CRISPR$trans[which(res_wt_CRISPR$padj < 0.05)] <- 1 

# make CD34/genes larger
res_wt_CRISPR$size <- 1
GOI <- c("ENSG00000174059", "ENSG00000175197", "ENSG00000171223", "ENSG00000104856")

#GOI <- c("ENSG00000174059", "ENSG00000101255", "ENSG00000077150", "ENSG00000175197",
#         "ENSG00000171223", "ENSG00000175592", "ENSG00000104856")

res_wt_CRISPR$size[rownames(res_wt_CRISPR) %in% GOI] <- 1.5

res_wt_CRISPR_df <- as.data.frame(res_wt_CRISPR)
res_wt_CRISPR_df$color <- factor(res_wt_CRISPR_df$color)
MAplot_wt_v_CRISPR_GOI <- ggplot(data = res_wt_CRISPR_df, aes(x = baseMean, y = log2FoldChange, color = color, alpha = trans, size=size)) +
  geom_point() +
  scale_x_continuous(trans = 'log10') +
  scale_alpha_continuous(guide=FALSE) +
  scale_size_continuous(guide=FALSE, range = c(1,3)) +
  scale_color_manual(values=color_genes, guide=FALSE) +
  ylim(c(-7, 7)) +
  geom_hline(yintercept=c(-1,1), linetype="dotted") +
  xlab("mean expression") +
  ylab("log fold change") +
  ggtitle("WT vs. CRISPR") +
  theme_tufte()
MAplot_wt_v_CRISPR_GOI

ggsave(plot = MAplot_wt_v_CRISPR_GOI, filename = "MAplot_wt_v_CRISPR_GOI.pdf", width=6, height=4, useDingbats=FALSE)
# color genes
res_CRISPR_Stat3$color <- 1
res_CRISPR_Stat3$color[rownames(res_CRISPR_Stat3)=="ENSG00000174059"] <- 2 # CD34
res_CRISPR_Stat3$color[rownames(res_CRISPR_Stat3)=="ENSG00000104856"] <- 3 # Rel B
res_CRISPR_Stat3$color[rownames(res_CRISPR_Stat3)=="ENSG00000077150"] <- 1 # NFkb2
res_CRISPR_Stat3$color[rownames(res_CRISPR_Stat3)=="ENSG00000175197"] <- 5 # DDIT3
res_CRISPR_Stat3$color[rownames(res_CRISPR_Stat3)=="ENSG00000171223"] <- 6 # JunB
res_CRISPR_Stat3$color[rownames(res_CRISPR_Stat3)=="ENSG00000175592"] <- 1 # FOSL1
res_CRISPR_Stat3$color[rownames(res_CRISPR_Stat3)=="ENSG00000101255"] <- 1 # TRIB3

# make non-DE transparent
res_CRISPR_Stat3$trans <- 0.1
res_CRISPR_Stat3$trans[which(res_CRISPR_Stat3$padj < 0.05)] <- 1 

# make CD34/genes larger
res_CRISPR_Stat3$size <- 1
GOI <- c("ENSG00000174059", "ENSG00000175197", "ENSG00000171223", "ENSG00000104856")

res_CRISPR_Stat3$size[rownames(res_CRISPR_Stat3) %in% GOI] <- 1.5

res_CRISPR_Stat3_df <- as.data.frame(res_CRISPR_Stat3)
res_CRISPR_Stat3_df$color <- factor(res_CRISPR_Stat3_df$color)
MAplot_CRISPR_v_STAT3_GOI <- ggplot(data = res_CRISPR_Stat3_df, aes(x = baseMean, y = log2FoldChange, color = color, alpha = trans, size=size)) +
  geom_point() +
  scale_x_continuous(trans = 'log10') +
  scale_alpha_continuous(guide=FALSE) +
  scale_size_continuous(guide=FALSE, range = c(1,3)) +
  scale_color_manual(values=color_genes, guide=FALSE) +
  ylim(c(-15, 15)) +
  geom_hline(yintercept=c(-1,1), linetype="dotted") +
  xlab("mean expression") +
  ylab("log fold change") +
  ggtitle("CRISPR vs. STAT3") +
  theme_tufte()
MAplot_CRISPR_v_STAT3_GOI

ggsave(plot = MAplot_CRISPR_v_STAT3_GOI, filename = "MAplot_CRISPR_v_STAT3_GOI.pdf", width=6, height=4, useDingbats=FALSE)
# color genes
res_wt_Stat3$color <- 1
res_wt_Stat3$color[rownames(res_wt_Stat3)=="ENSG00000174059"] <- 2 # CD34
res_wt_Stat3$color[rownames(res_wt_Stat3)=="ENSG00000104856"] <- 3 # Rel B
res_wt_Stat3$color[rownames(res_wt_Stat3)=="ENSG00000077150"] <- 1 # NFkb2
res_wt_Stat3$color[rownames(res_wt_Stat3)=="ENSG00000175197"] <- 5 # DDIT3
res_wt_Stat3$color[rownames(res_wt_Stat3)=="ENSG00000171223"] <- 6 # JunB
res_wt_Stat3$color[rownames(res_wt_Stat3)=="ENSG00000175592"] <- 1 # FOSL1
res_wt_Stat3$color[rownames(res_wt_Stat3)=="ENSG00000101255"] <- 1 # TRIB3

# make non-DE transparent
res_wt_Stat3$trans <- 0.1
res_wt_Stat3$trans[which(res_wt_Stat3$padj < 0.05)] <- 1 

# make CD34/genes larger
res_wt_Stat3$size <- 1
res_wt_Stat3$size[rownames(res_wt_Stat3) %in% GOI] <- 1.5

res_wt_Stat3_df <- as.data.frame(res_wt_Stat3)
res_wt_Stat3_df$color <- factor(res_wt_Stat3_df$color)
MAplot_wt_v_STAT3_GOI <- ggplot(data = res_wt_Stat3_df, aes(x = baseMean, y = log2FoldChange, color = color, alpha = trans, size=size)) +
  geom_point() +
  scale_x_continuous(trans = 'log10') +
  scale_alpha_continuous(guide=FALSE) +
  scale_size_continuous(guide=FALSE, range = c(1,3)) +
  scale_color_manual(values=color_genes, guide=FALSE) +
  ylim(c(-15, 15)) +
  geom_hline(yintercept=c(-1,1), linetype="dotted") +
  xlab("mean expression") +
  ylab("log fold change") +
  ggtitle("wt vs. STAT3") +
  theme_tufte()
MAplot_wt_v_STAT3_GOI

ggsave(plot = MAplot_wt_v_STAT3_GOI, filename = "MAplot_wt_v_STAT3_GOI.pdf", width=6, height=4, useDingbats=FALSE)

Outputting the Session Info

sessionInfo()
## R version 3.5.1 (2018-07-02)
## Platform: x86_64-apple-darwin15.6.0 (64-bit)
## Running under: OS X El Capitan 10.11.6
## 
## Matrix products: default
## BLAS: /Library/Frameworks/R.framework/Versions/3.5/Resources/lib/libRblas.0.dylib
## LAPACK: /Library/Frameworks/R.framework/Versions/3.5/Resources/lib/libRlapack.dylib
## 
## locale:
## [1] en_US.UTF-8/en_US.UTF-8/en_US.UTF-8/C/en_US.UTF-8/en_US.UTF-8
## 
## attached base packages:
##  [1] grid      stats4    parallel  stats     graphics  grDevices utils    
##  [8] datasets  methods   base     
## 
## other attached packages:
##  [1] scales_1.1.0                GeneOverlap_1.18.0         
##  [3] VennDiagram_1.6.20          futile.logger_1.4.3        
##  [5] ggthemes_4.2.0              ggplot2_3.3.0              
##  [7] RFmarkerDetector_1.0.1      AUCRF_1.1                  
##  [9] randomForest_4.6-14         UpSetR_1.4.0               
## [11] pheatmap_1.0.12             RColorBrewer_1.1-2         
## [13] genefilter_1.64.0           qvalue_2.14.1              
## [15] RUVSeq_1.16.1               edgeR_3.24.3               
## [17] limma_3.38.3                EDASeq_2.16.3              
## [19] ShortRead_1.40.0            GenomicAlignments_1.18.1   
## [21] Rsamtools_1.34.1            Biostrings_2.50.2          
## [23] XVector_0.22.0              DESeq2_1.22.2              
## [25] SummarizedExperiment_1.12.0 DelayedArray_0.8.0         
## [27] BiocParallel_1.16.6         matrixStats_0.55.0         
## [29] Biobase_2.42.0              GenomicRanges_1.34.0       
## [31] GenomeInfoDb_1.18.2         IRanges_2.16.0             
## [33] S4Vectors_0.20.1            BiocGenerics_0.28.0        
## 
## loaded via a namespace (and not attached):
##  [1] colorspace_1.4-1       hwriter_1.3.2          htmlTable_1.13.3      
##  [4] base64enc_0.1-3        rstudioapi_0.11        farver_2.0.3          
##  [7] bit64_0.9-7            AnnotationDbi_1.44.0   codetools_0.2-16      
## [10] splines_3.5.1          R.methodsS3_1.8.0      DESeq_1.34.1          
## [13] WilcoxCV_1.0-2         geneplotter_1.60.0     knitr_1.28            
## [16] Formula_1.2-3          annotate_1.60.1        cluster_2.1.0         
## [19] R.oo_1.23.0            compiler_3.5.1         httr_1.4.1            
## [22] backports_1.1.5        assertthat_0.2.1       Matrix_1.2-18         
## [25] formatR_1.7            acepack_1.4.1          htmltools_0.4.0       
## [28] prettyunits_1.1.1      tools_3.5.1            gtable_0.3.0          
## [31] glue_1.3.1             GenomeInfoDbData_1.2.0 reshape2_1.4.3        
## [34] dplyr_0.8.5            Rcpp_1.0.3             vctrs_0.2.4           
## [37] gdata_2.18.0           UsingR_2.0-6           rtracklayer_1.42.2    
## [40] xfun_0.12              stringr_1.4.0          lifecycle_0.2.0       
## [43] gtools_3.8.1           XML_3.99-0.3           HistData_0.8-6        
## [46] zlibbioc_1.28.0        MASS_7.3-51.5          aroma.light_3.12.0    
## [49] hms_0.5.3              lambda.r_1.2.4         yaml_2.2.1            
## [52] memoise_1.1.0          gridExtra_2.3          biomaRt_2.38.0        
## [55] rpart_4.1-15           latticeExtra_0.6-28    stringi_1.4.6         
## [58] RSQLite_2.2.0          checkmate_2.0.0        caTools_1.17.1.1      
## [61] GenomicFeatures_1.34.8 rlang_0.4.5            pkgconfig_2.0.3       
## [64] bitops_1.0-6           evaluate_0.14          lattice_0.20-40       
## [67] ROCR_1.0-7             purrr_0.3.3            labeling_0.3          
## [70] htmlwidgets_1.5.1      bit_1.1-15.2           tidyselect_1.0.0      
## [73] plyr_1.8.6             magrittr_1.5           R6_2.4.1              
## [76] gplots_3.0.3           Hmisc_4.3-1            DBI_1.1.0             
## [79] withr_2.1.2            pillar_1.4.3           foreign_0.8-76        
## [82] survival_3.1-11        RCurl_1.98-1.1         nnet_7.3-13           
## [85] tibble_2.1.3           crayon_1.3.4           futile.options_1.0.1  
## [88] KernSmooth_2.23-16     rmarkdown_2.1          progress_1.2.2        
## [91] locfit_1.5-9.1         data.table_1.12.8      blob_1.2.1            
## [94] digest_0.6.25          xtable_1.8-4           R.utils_2.9.2         
## [97] munsell_0.5.0